library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.0.5
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages --------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.3     v purrr   0.3.4
v tibble  3.1.0     v dplyr   1.0.5
v tidyr   1.1.3     v stringr 1.4.0
v readr   1.4.0     v forcats 0.5.1
Warning: package ‘ggplot2’ was built under R version 4.0.4
Warning: package ‘tibble’ was built under R version 4.0.5
Warning: package ‘tidyr’ was built under R version 4.0.4
Warning: package ‘dplyr’ was built under R version 4.0.4
Warning: package ‘forcats’ was built under R version 4.0.4
-- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(janitor)
Warning: package ‘janitor’ was built under R version 4.0.4

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(glue)

Attaching package: ‘glue’

The following object is masked from ‘package:dplyr’:

    collapse
theme_set(theme_light())
flat_transactions <- readxl::read_xls("data/HPSSA Dataset 1 - Number of residential property sales by MSOA/HPSSA Dataset 1 - Number of residential property sales by MSOA.xls", sheet = "1e", skip = 5) %>% 
  clean_names() %>% 
  
  # remove erroneous column created when reading xls file
  select(-x105)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
New names:
* `` -> ...105
leeds_flats <- flat_transactions %>% 
  filter(local_authority_name == "Leeds") %>% 
  pivot_longer(cols = year_ending_dec_1995:year_ending_sep_2020, 
               names_to = "date", 
               values_to = "num_transactions") %>% 
  mutate(date = str_replace(date, "year_ending_", "")) %>% 
  separate(date, into = c("month", "year"), sep = "_") %>% 
  mutate(date = lubridate::my(glue("{month} {year}")))

leeds_flats_plotting <- leeds_flats %>% 
  filter(date >= lubridate::my("jan 2017")) %>% 
  group_by(msoa_code) %>% 
  mutate(ave_num_transactions = mean(num_transactions),
         is_high_num_transactions = ave_num_transactions > 100) %>% 
  ungroup()

ggplot(leeds_flats_plotting, 
       aes(date, num_transactions, 
           group = msoa_name,
           colour = is_high_num_transactions)) +
  geom_line() +
  geom_label(data = leeds_flats_plotting %>% 
                      filter(date == max(date) &
                               is_high_num_transactions == TRUE),
             mapping = aes(label = msoa_name),
             nudge_x = 200) +
  theme_minimal()


leeds_flats_plotting %>% 
  filter(is_high_num_transactions) %>% 
  distinct(msoa_name)
leeds_flats_plotting %>% 
  group_by(date, is_high_num_transactions) %>% 
  summarise(leeds_num_transactions = sum(num_transactions)) %>% 
  
  ggplot(mapping = aes(date, leeds_num_transactions, colour = is_high_num_transactions)) +
  geom_line()
`summarise()` has grouped output by 'date'. You can override using the `.groups` argument.

p <- leeds_flats_plotting %>% 
  filter(msoa_name == "Leeds 111") %>% 
  
  ggplot(mapping = aes(date, num_transactions)) +
  geom_point() +
  geom_line() +
  labs(title = "Leeds City Centre")

plotly::ggplotly(p)
all_housing_transactions <-  readxl::read_xls("data/HPSSA Dataset 1 - Number of residential property sales by MSOA/HPSSA Dataset 1 - Number of residential property sales by MSOA.xls", sheet = "1a", skip = 5) %>% 
  clean_names() %>% 
  
  # remove erroneous column created when reading xls file
  select(-x105)
New names:
* `` -> ...105
leeds_all <- all_housing_transactions %>% 
  filter(local_authority_name == "Leeds") %>% 
  pivot_longer(cols = year_ending_dec_1995:year_ending_sep_2020, 
               names_to = "date", 
               values_to = "num_transactions") %>% 
  mutate(date = str_replace(date, "year_ending_", "")) %>% 
  separate(date, into = c("month", "year"), sep = "_") %>% 
  mutate(date = lubridate::my(glue("{month} {year}")))

leeds_flats_focus <- leeds_flats %>% 
  select(msoa_code, msoa_name, date,
         num_flats_trans = num_transactions)

leeds_all_focus <- leeds_all %>% 
  select(msoa_code, msoa_name, date,
         num_all_trans = num_transactions)
  
leeds_all_plotting <- leeds_all_focus %>% 
  left_join(leeds_flats_focus) %>% 
  mutate(num_non_flats_trans = num_all_trans - num_flats_trans) %>% 
  pivot_longer(num_all_trans:num_non_flats_trans, names_to = "trans_type", values_to = "num_trans")
Joining, by = c("msoa_code", "msoa_name", "date")
leeds_all_plotting %>% 
  filter(date >= lubridate::my("jan 2017")) %>%
  group_by(date, trans_type) %>% 
  summarise(leeds_num_transactions = sum(num_trans)) %>% 
  
  ggplot(mapping = aes(date, leeds_num_transactions, colour = trans_type)) +
  geom_line()
`summarise()` has grouped output by 'date'. You can override using the `.groups` argument.

closest_pre_gren <- leeds_all_plotting$date == lubridate::my("jun 2017")
pre_gren_flats_trans <- leeds_all_plotting[closest_pre_gren, 'num_trans']

pre_grenf_trans <- leeds_all_plotting %>% 
  filter(date == lubridate::my("jun 2017")) %>% 
  group_by(trans_type) %>% 
  summarise(num_trans = sum(num_trans))

pre_grenf_trans

pre_gref_all <- pre_grenf_trans[[1,2]]
pre_gref_flats <- pre_grenf_trans[[2,2]]
pre_gref_non_flats <- pre_grenf_trans[[3,2]]

p1 <- leeds_all_plotting %>% 
  group_by(date, trans_type) %>%
  summarise(num_trans = sum(num_trans)) %>% 
  pivot_wider(names_from = trans_type, values_from = num_trans) %>%
  mutate(pc_all = ((num_all_trans - pre_gref_all)/ pre_gref_all) * 100,
         pc_flats = ((num_flats_trans - pre_gref_flats)/ pre_gref_flats) * 100,
         pc_non_flats = ((num_non_flats_trans - pre_gref_non_flats) / pre_gref_non_flats) * 100
         ) %>% 
  filter(date >= lubridate::my("may 2017")) %>% 
  
  pivot_longer(cols = pc_all:pc_non_flats, 
               names_to = "trans_type",
               values_to = "pc_in_trans") %>% 
  
  ggplot(aes(date, pc_in_trans, colour = trans_type)) +
  geom_point() +
  geom_line()
`summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
  
plotly::ggplotly(p1)   
LS0tDQp0aXRsZTogIkZsYXQgUHJpY2VzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShnbHVlKQ0KDQp0aGVtZV9zZXQodGhlbWVfbGlnaHQoKSkNCmBgYA0KDQpgYGB7cn0NCmZsYXRfdHJhbnNhY3Rpb25zIDwtIHJlYWR4bDo6cmVhZF94bHMoImRhdGEvSFBTU0EgRGF0YXNldCAxIC0gTnVtYmVyIG9mIHJlc2lkZW50aWFsIHByb3BlcnR5IHNhbGVzIGJ5IE1TT0EvSFBTU0EgRGF0YXNldCAxIC0gTnVtYmVyIG9mIHJlc2lkZW50aWFsIHByb3BlcnR5IHNhbGVzIGJ5IE1TT0EueGxzIiwgc2hlZXQgPSAiMWUiLCBza2lwID0gNSkgJT4lIA0KICBjbGVhbl9uYW1lcygpICU+JSANCiAgDQogICMgcmVtb3ZlIGVycm9uZW91cyBjb2x1bW4gY3JlYXRlZCB3aGVuIHJlYWRpbmcgeGxzIGZpbGUNCiAgc2VsZWN0KC14MTA1KQ0KDQpsZWVkc19mbGF0cyA8LSBmbGF0X3RyYW5zYWN0aW9ucyAlPiUgDQogIGZpbHRlcihsb2NhbF9hdXRob3JpdHlfbmFtZSA9PSAiTGVlZHMiKSAlPiUgDQogIHBpdm90X2xvbmdlcihjb2xzID0geWVhcl9lbmRpbmdfZGVjXzE5OTU6eWVhcl9lbmRpbmdfc2VwXzIwMjAsIA0KICAgICAgICAgICAgICAgbmFtZXNfdG8gPSAiZGF0ZSIsIA0KICAgICAgICAgICAgICAgdmFsdWVzX3RvID0gIm51bV90cmFuc2FjdGlvbnMiKSAlPiUgDQogIG11dGF0ZShkYXRlID0gc3RyX3JlcGxhY2UoZGF0ZSwgInllYXJfZW5kaW5nXyIsICIiKSkgJT4lIA0KICBzZXBhcmF0ZShkYXRlLCBpbnRvID0gYygibW9udGgiLCAieWVhciIpLCBzZXAgPSAiXyIpICU+JSANCiAgbXV0YXRlKGRhdGUgPSBsdWJyaWRhdGU6Om15KGdsdWUoInttb250aH0ge3llYXJ9IikpKQ0KDQpsZWVkc19mbGF0c19wbG90dGluZyA8LSBsZWVkc19mbGF0cyAlPiUgDQogIGZpbHRlcihkYXRlID49IGx1YnJpZGF0ZTo6bXkoImphbiAyMDE3IikpICU+JSANCiAgZ3JvdXBfYnkobXNvYV9jb2RlKSAlPiUgDQogIG11dGF0ZShhdmVfbnVtX3RyYW5zYWN0aW9ucyA9IG1lYW4obnVtX3RyYW5zYWN0aW9ucyksDQogICAgICAgICBpc19oaWdoX251bV90cmFuc2FjdGlvbnMgPSBhdmVfbnVtX3RyYW5zYWN0aW9ucyA+IDEwMCkgJT4lIA0KICB1bmdyb3VwKCkNCg0KZ2dwbG90KGxlZWRzX2ZsYXRzX3Bsb3R0aW5nLCANCiAgICAgICBhZXMoZGF0ZSwgbnVtX3RyYW5zYWN0aW9ucywgDQogICAgICAgICAgIGdyb3VwID0gbXNvYV9uYW1lLA0KICAgICAgICAgICBjb2xvdXIgPSBpc19oaWdoX251bV90cmFuc2FjdGlvbnMpKSArDQogIGdlb21fbGluZSgpICsNCiAgZ2VvbV9sYWJlbChkYXRhID0gbGVlZHNfZmxhdHNfcGxvdHRpbmcgJT4lIA0KICAgICAgICAgICAgICAgICAgICAgIGZpbHRlcihkYXRlID09IG1heChkYXRlKSAmDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaXNfaGlnaF9udW1fdHJhbnNhY3Rpb25zID09IFRSVUUpLA0KICAgICAgICAgICAgIG1hcHBpbmcgPSBhZXMobGFiZWwgPSBtc29hX25hbWUpLA0KICAgICAgICAgICAgIG51ZGdlX3ggPSAyMDApICsNCiAgdGhlbWVfbWluaW1hbCgpDQoNCmxlZWRzX2ZsYXRzX3Bsb3R0aW5nICU+JSANCiAgZmlsdGVyKGlzX2hpZ2hfbnVtX3RyYW5zYWN0aW9ucykgJT4lIA0KICBkaXN0aW5jdChtc29hX25hbWUpDQpgYGANCmBgYHtyfQ0KbGVlZHNfZmxhdHNfcGxvdHRpbmcgJT4lIA0KICBncm91cF9ieShkYXRlLCBpc19oaWdoX251bV90cmFuc2FjdGlvbnMpICU+JSANCiAgc3VtbWFyaXNlKGxlZWRzX251bV90cmFuc2FjdGlvbnMgPSBzdW0obnVtX3RyYW5zYWN0aW9ucykpICU+JSANCiAgDQogIGdncGxvdChtYXBwaW5nID0gYWVzKGRhdGUsIGxlZWRzX251bV90cmFuc2FjdGlvbnMsIGNvbG91ciA9IGlzX2hpZ2hfbnVtX3RyYW5zYWN0aW9ucykpICsNCiAgZ2VvbV9saW5lKCkNCmBgYA0KYGBge3J9DQpwIDwtIGxlZWRzX2ZsYXRzX3Bsb3R0aW5nICU+JSANCiAgZmlsdGVyKG1zb2FfbmFtZSA9PSAiTGVlZHMgMTExIikgJT4lIA0KICANCiAgZ2dwbG90KG1hcHBpbmcgPSBhZXMoZGF0ZSwgbnVtX3RyYW5zYWN0aW9ucykpICsNCiAgZ2VvbV9wb2ludCgpICsNCiAgZ2VvbV9saW5lKCkgKw0KICBsYWJzKHRpdGxlID0gIkxlZWRzIENpdHkgQ2VudHJlIikNCg0KcGxvdGx5OjpnZ3Bsb3RseShwKQ0KYGBgDQoNCg0KYGBge3J9DQphbGxfaG91c2luZ190cmFuc2FjdGlvbnMgPC0gIHJlYWR4bDo6cmVhZF94bHMoImRhdGEvSFBTU0EgRGF0YXNldCAxIC0gTnVtYmVyIG9mIHJlc2lkZW50aWFsIHByb3BlcnR5IHNhbGVzIGJ5IE1TT0EvSFBTU0EgRGF0YXNldCAxIC0gTnVtYmVyIG9mIHJlc2lkZW50aWFsIHByb3BlcnR5IHNhbGVzIGJ5IE1TT0EueGxzIiwgc2hlZXQgPSAiMWEiLCBza2lwID0gNSkgJT4lIA0KICBjbGVhbl9uYW1lcygpICU+JSANCiAgDQogICMgcmVtb3ZlIGVycm9uZW91cyBjb2x1bW4gY3JlYXRlZCB3aGVuIHJlYWRpbmcgeGxzIGZpbGUNCiAgc2VsZWN0KC14MTA1KQ0KDQpsZWVkc19hbGwgPC0gYWxsX2hvdXNpbmdfdHJhbnNhY3Rpb25zICU+JSANCiAgZmlsdGVyKGxvY2FsX2F1dGhvcml0eV9uYW1lID09ICJMZWVkcyIpICU+JSANCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSB5ZWFyX2VuZGluZ19kZWNfMTk5NTp5ZWFyX2VuZGluZ19zZXBfMjAyMCwgDQogICAgICAgICAgICAgICBuYW1lc190byA9ICJkYXRlIiwgDQogICAgICAgICAgICAgICB2YWx1ZXNfdG8gPSAibnVtX3RyYW5zYWN0aW9ucyIpICU+JSANCiAgbXV0YXRlKGRhdGUgPSBzdHJfcmVwbGFjZShkYXRlLCAieWVhcl9lbmRpbmdfIiwgIiIpKSAlPiUgDQogIHNlcGFyYXRlKGRhdGUsIGludG8gPSBjKCJtb250aCIsICJ5ZWFyIiksIHNlcCA9ICJfIikgJT4lIA0KICBtdXRhdGUoZGF0ZSA9IGx1YnJpZGF0ZTo6bXkoZ2x1ZSgie21vbnRofSB7eWVhcn0iKSkpDQoNCmxlZWRzX2ZsYXRzX2ZvY3VzIDwtIGxlZWRzX2ZsYXRzICU+JSANCiAgc2VsZWN0KG1zb2FfY29kZSwgbXNvYV9uYW1lLCBkYXRlLA0KICAgICAgICAgbnVtX2ZsYXRzX3RyYW5zID0gbnVtX3RyYW5zYWN0aW9ucykNCg0KbGVlZHNfYWxsX2ZvY3VzIDwtIGxlZWRzX2FsbCAlPiUgDQogIHNlbGVjdChtc29hX2NvZGUsIG1zb2FfbmFtZSwgZGF0ZSwNCiAgICAgICAgIG51bV9hbGxfdHJhbnMgPSBudW1fdHJhbnNhY3Rpb25zKQ0KICANCmxlZWRzX2FsbF9wbG90dGluZyA8LSBsZWVkc19hbGxfZm9jdXMgJT4lIA0KICBsZWZ0X2pvaW4obGVlZHNfZmxhdHNfZm9jdXMpICU+JSANCiAgbXV0YXRlKG51bV9ub25fZmxhdHNfdHJhbnMgPSBudW1fYWxsX3RyYW5zIC0gbnVtX2ZsYXRzX3RyYW5zKSAlPiUgDQogIHBpdm90X2xvbmdlcihudW1fYWxsX3RyYW5zOm51bV9ub25fZmxhdHNfdHJhbnMsIG5hbWVzX3RvID0gInRyYW5zX3R5cGUiLCB2YWx1ZXNfdG8gPSAibnVtX3RyYW5zIikNCg0KbGVlZHNfYWxsX3Bsb3R0aW5nICU+JSANCiAgZmlsdGVyKGRhdGUgPj0gbHVicmlkYXRlOjpteSgiamFuIDIwMTciKSkgJT4lDQogIGdyb3VwX2J5KGRhdGUsIHRyYW5zX3R5cGUpICU+JSANCiAgc3VtbWFyaXNlKGxlZWRzX251bV90cmFuc2FjdGlvbnMgPSBzdW0obnVtX3RyYW5zKSkgJT4lIA0KICANCiAgZ2dwbG90KG1hcHBpbmcgPSBhZXMoZGF0ZSwgbGVlZHNfbnVtX3RyYW5zYWN0aW9ucywgY29sb3VyID0gdHJhbnNfdHlwZSkpICsNCiAgZ2VvbV9saW5lKCkNCmBgYA0KDQpgYGB7cn0NCmNsb3Nlc3RfcHJlX2dyZW4gPC0gbGVlZHNfYWxsX3Bsb3R0aW5nJGRhdGUgPT0gbHVicmlkYXRlOjpteSgianVuIDIwMTciKQ0KcHJlX2dyZW5fZmxhdHNfdHJhbnMgPC0gbGVlZHNfYWxsX3Bsb3R0aW5nW2Nsb3Nlc3RfcHJlX2dyZW4sICdudW1fdHJhbnMnXQ0KDQpwcmVfZ3JlbmZfdHJhbnMgPC0gbGVlZHNfYWxsX3Bsb3R0aW5nICU+JSANCiAgZmlsdGVyKGRhdGUgPT0gbHVicmlkYXRlOjpteSgianVuIDIwMTciKSkgJT4lIA0KICBncm91cF9ieSh0cmFuc190eXBlKSAlPiUgDQogIHN1bW1hcmlzZShudW1fdHJhbnMgPSBzdW0obnVtX3RyYW5zKSkNCg0KcHJlX2dyZW5mX3RyYW5zDQoNCnByZV9ncmVmX2FsbCA8LSBwcmVfZ3JlbmZfdHJhbnNbWzEsMl1dDQpwcmVfZ3JlZl9mbGF0cyA8LSBwcmVfZ3JlbmZfdHJhbnNbWzIsMl1dDQpwcmVfZ3JlZl9ub25fZmxhdHMgPC0gcHJlX2dyZW5mX3RyYW5zW1szLDJdXQ0KDQpwMSA8LSBsZWVkc19hbGxfcGxvdHRpbmcgJT4lIA0KICBncm91cF9ieShkYXRlLCB0cmFuc190eXBlKSAlPiUNCiAgc3VtbWFyaXNlKG51bV90cmFucyA9IHN1bShudW1fdHJhbnMpKSAlPiUgDQogIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSB0cmFuc190eXBlLCB2YWx1ZXNfZnJvbSA9IG51bV90cmFucykgJT4lDQogIG11dGF0ZShwY19hbGwgPSAoKG51bV9hbGxfdHJhbnMgLSBwcmVfZ3JlZl9hbGwpLyBwcmVfZ3JlZl9hbGwpICogMTAwLA0KICAgICAgICAgcGNfZmxhdHMgPSAoKG51bV9mbGF0c190cmFucyAtIHByZV9ncmVmX2ZsYXRzKS8gcHJlX2dyZWZfZmxhdHMpICogMTAwLA0KICAgICAgICAgcGNfbm9uX2ZsYXRzID0gKChudW1fbm9uX2ZsYXRzX3RyYW5zIC0gcHJlX2dyZWZfbm9uX2ZsYXRzKSAvIHByZV9ncmVmX25vbl9mbGF0cykgKiAxMDANCiAgICAgICAgICkgJT4lIA0KICBmaWx0ZXIoZGF0ZSA+PSBsdWJyaWRhdGU6Om15KCJtYXkgMjAxNyIpKSAlPiUgDQogIA0KICBwaXZvdF9sb25nZXIoY29scyA9IHBjX2FsbDpwY19ub25fZmxhdHMsIA0KICAgICAgICAgICAgICAgbmFtZXNfdG8gPSAidHJhbnNfdHlwZSIsDQogICAgICAgICAgICAgICB2YWx1ZXNfdG8gPSAicGNfaW5fdHJhbnMiKSAlPiUgDQogIA0KICBnZ3Bsb3QoYWVzKGRhdGUsIHBjX2luX3RyYW5zLCBjb2xvdXIgPSB0cmFuc190eXBlKSkgKw0KICBnZW9tX3BvaW50KCkgKw0KICBnZW9tX2xpbmUoKQ0KICANCnBsb3RseTo6Z2dwbG90bHkocDEpICAgDQpgYGANCg0KYGBge3J9DQoNCmBgYA0KDQo=